root_path <- "~/Desktop/raw_data_must/"
chrom_path <- paste0(root_path, "Chromium/")
vis_path <- paste0(root_path, "Visium/outs/")
xe_path <- paste0(root_path, "Xenium/outs/")
aggxe_path <- paste0(root_path, "AggXe/outs/")
suppressMessages({
library(Seurat)
library(BayesSpace)
library(ggplot2)
library(patchwork)
library(dplyr)
library(tibble)
library(scater)
library(scran)
library(scuttle)
library(grid)
library(hrbrthemes)
library(gridExtra)
library(SingleR)
library(magick)
library(harmony)
library(scales)
library(knitr)
library(here)
library(cowplot)
})
source("utils_new.R")
Read in Visium Seurat object, and spatially visualize the per spot total count
vis_path <- paste0(root_path, "Visium/outs/")
vis <- Load10X_Spatial(vis_path)
Idents(vis) <- ""
SpatialFeaturePlot(vis, features = "nCount_Spatial") + theme(legend.position = "right")
# saveRDS(vis, "./intermediate_data/vis_raw.rds")
QC Visium with the guidance from OSTA book. Library size
(sum in OSTA book, nCount_Spatial in Seurat),
number of features detected (detected in OSTA book,
nFeature_Spatial in Seurat), mitochondria percentage, and
number of cells per spot need to be checked. However, in this breast
cancer data, no ground truth of number of cells per spot is given like
in the Human DLPFC data in spatialLIBD. Therefore, we skip
the last criteria, and focusing on the first three.
Investigate the proper cut-off threshold. Note that Visium is not at
single cell resolution, so the automated QC functionality developed for
single-cell RNA-seq, such as scuttle::isOutlier(), should
be carefully double-checked to prevent falsely removal of spots in
spatial data.
Here we have a quick view of the distribution of the three variables of interest. We want to eliminate spots with low total library size, and low number of features detected per spot, and high mitochondria.
vis <- readRDS("./intermediate_data/vis_raw.rds")
# Prepare mitochondria percentage
vis$percent.mt <- PercentageFeatureSet(vis, pattern = "^MT-")
VlnPlot(vis, features = c("nCount_Spatial", "nFeature_Spatial", "percent.mt"), pt.size = 0.1)
Check the lower end of nCount_Spatial, and set the
threshold at 100 to visualize the effect of QC in
plotQCSpatial_seu(). However, note that the removal is not
necessary.
head(sort(vis$nCount_Spatial), 20)
## CCACGCCTGGAGAAGT-1 CGTGTAGACACCTTCC-1 CCAGCATCTACCGCTT-1 TGGCGACACTCGCATC-1
## 39 54 65 77
## TAGGTCTATGGATTAT-1 CTGTGCAACATTGAGG-1 CCGTACTGGACGCAGA-1 TCTGCAGTAGTTATGA-1
## 108 134 161 187
## GGTGCGTGGAGTTGGT-1 TCCGAGACAATACGCA-1 GATGGATCGTGGAGGT-1 CGTAGTCGGTTATCGA-1
## 189 204 391 524
## ACGCAAGGCGTCCAAT-1 CTCAATGACCGTGGTA-1 ACGAGGCTTAATCACG-1 AAGTCTACGCACTCGG-1
## 569 629 637 680
## TAACGAGCTATCCGTC-1 TCTGGTCACTATCTGT-1 ACGAAGTTCCAGCACC-1 GTCACTGTGATAACTC-1
## 757 764 784 785
table(isOutlier(vis$nCount_Spatial, type = "lower"))
##
## FALSE
## 4992
Check the lower end of nFeature_Spatial, and set the
threshold also at 100.
head(sort(vis$nFeature_Spatial), 20)
## CCACGCCTGGAGAAGT-1 CGTGTAGACACCTTCC-1 CCAGCATCTACCGCTT-1 TGGCGACACTCGCATC-1
## 38 51 65 72
## TAGGTCTATGGATTAT-1 CTGTGCAACATTGAGG-1 CCGTACTGGACGCAGA-1 GGTGCGTGGAGTTGGT-1
## 104 125 148 169
## TCTGCAGTAGTTATGA-1 TCCGAGACAATACGCA-1 GATGGATCGTGGAGGT-1 CGTAGTCGGTTATCGA-1
## 178 192 352 448
## ACGCAAGGCGTCCAAT-1 CTCAATGACCGTGGTA-1 ACGAGGCTTAATCACG-1 AAGTCTACGCACTCGG-1
## 518 537 561 568
## TCTGGTCACTATCTGT-1 ATGCTGACGACTCGCT-1 TAACGAGCTATCCGTC-1 TCGCCGGAAGGCAGGA-1
## 603 629 640 643
table(isOutlier(vis$nFeature_Spatial, type = "lower"))
##
## FALSE
## 4992
Check the higher end of percent.mt, and no spot has
unusually high mitochondria percentage.
tail(sort(vis$percent.mt), 20)
## GTCTGTGGATCTGACG-1 TCTAACAAGCCAATCC-1 TGCACCGGTCACCTCC-1 ATAGCACCTGCTGTCT-1
## 12.82434 12.84427 12.86702 12.87049
## TGGCGAGCTCCACGTG-1 TCGACCTGCAGCTCAG-1 CCTAATCAGGCCATTA-1 GAGCGGCAATGGAATA-1
## 12.88375 12.92543 13.01404 13.02536
## GCCGCTTCTAAGGTGA-1 TAGTGACTCGCTCCGC-1 ACGGTGTACGGTGCCA-1 ACGCAAGGCATAAGTC-1
## 13.07899 13.12256 13.12948 13.22388
## ATCGTAATCCACATGC-1 GGTTCTATACCGCTTG-1 AACATCTTAAGGCTCA-1 TACGGCCTTGTAGGAG-1
## 13.39920 13.42821 13.62140 13.64523
## CTCGGACTAACGTGGC-1 GCCATTGTAATGTCTT-1 GGACCATCACCGCCAA-1 AACCTGACAGTGCCGC-1
## 13.65560 13.85249 13.96749 14.03253
Plot QC result. The helper function here is inspired by the
Bioconductor function ggspavis::plotQC(spe), and here
plotQCSpatial_seu() takes a Seurat object with “Spatial”
assay and has spatial coordinates stored in its metadata.
vis$low_count_spots <- vis$nCount_Spatial < 100 | vis$nFeature_Spatial < 100
plotQCSpatial_seu(vis, flag = "low_count_spots")
Deriving low abundance gene flag. For genes, we require it to be detected in at least 20 spots. 66 genes will be removed.
vis_lowgenecount_drop <- rowSums(GetAssayData(vis, "counts") > 0) < 20
table(vis_lowgenecount_drop)
## vis_lowgenecount_drop
## FALSE TRUE
## 18019 66
Eliminate genes and spots did not pass QC:
vis <- vis[!vis_lowgenecount_drop, # low abundance genes
!vis$low_count_spots ] # low library size & number of detected genes per spot
dim(vis) # 18019 4988
## [1] 18019 4988
# saveRDS(vis, "./intermediate_data/vis_qcd.rds")
Normalization with SCTransform, as recommended by Seurat framework.
SCTransform() replaces the NormalizeData(),
ScaleData(), and FindVariableFeatures() steps,
with default number of variable features as 3000.
# vis <- readRDS("./intermediate_data/vis_qcd.rds")
vis <- SCTransform(vis, assay = "Spatial", verbose = FALSE)
Dimensionality reduction and clustering with Seurat.
vis <- RunPCA(vis, npcs = 50, assay = "SCT", verbose = FALSE)
vis <- RunUMAP(vis, reduction = "pca", dims = 1:50)
vis <- FindNeighbors(vis, reduction = "pca", dims = 1:50)
vis <- FindClusters(vis, verbose = FALSE)
# saveRDS(vis, "./intermediate_data/vis_qcd_dimred.rds")
Spatially variable genes (SVGs) identification based on spatial autocorrelation “Moran’s I” index. This step takes a long time to run, so for the sake of time, it is recommended to directly load in the intermediate data.
vis <- readRDS("./intermediate_data/vis_qcd_dimred.rds")
# DO NOT RUN - TAKING TOO LONG
vis <- FindSpatiallyVariableFeatures(vis, assay = "SCT", features = VariableFeatures(vis), selection.method = "moransi")
vis_SVGs_6 = rownames(
dplyr::slice_min(
vis[["SCT"]]@meta.features,
moransi.spatially.variable.rank,
n = 6
)
)
Visualize top 6 Visium SVGs in UMAP space.
vis <- readRDS("./intermediate_data/vis_qcd_dimred.rds")
vis_SVGs_6 = c("DCAF7", "MT-ND1", "CSDE1", "TACO1", "MIEN1", "NRAS")
FeaturePlot(vis, features = vis_SVGs_6, ncol = 3, raster = FALSE)
Visualize top 6 Visium SVGs spatially. Combining results from deconvolution, we can see that genes highly expressed in invasive tumor region has high spatial variability.
markers <- vis_SVGs_6
feat.plots <- purrr::map(markers, function(x) SpatialFeaturePlot(vis, x))
patchwork::wrap_plots(feat.plots, ncol=3)
Visium UMAP clustering colored by Seurat
vis <- readRDS("./intermediate_data/vis_qcd_dimred.rds")
p1 <- DimPlot(vis) + ggtitle("Colored by Seurat Clustering")
p2 <- SpatialDimPlot(vis, label = TRUE, label.size = 3)
p1 + p2
Combining the information from deconvolution result later, we would
realize that cluster 1 is likely invasive tumor.
Spatial clustering with BayesSpace. First, we convert the Visium Seurat object to SingleCellExperiment object as required by BayesSpace.
We carry over everything we have calculated so far for Visium, except the spatial coordinates. Seurat::Load10X_Spatial() does not keep all the spatial columns required for BayesSpace (e.g. enhanced version), and it also changes the scaling of the tissue coordinates to match with H&E image, which is incompatible with BayesSpace. Instead of using SeuratObject::GetTissueCoordinates(vis), we read in the original spatial coordinates, with the following modification.
# First, convert Visium Seurat object to SCE required by BayesSpace
vis_mat <- vis@assays$Spatial@counts
vis_sct <- vis@assays$SCT@counts
vis_pca <- vis@reductions$pca@cell.embeddings
vis_umap <- vis@reductions$umap@cell.embeddings
# Merge in spatial coordinates from original tissue_position.csv.
vis_coord <- read.csv(paste0(vis_path, "spatial/tissue_positions_list.csv"), header = FALSE)
colnames(vis_coord) <- c("barcodes", "in_tissue", "row", "col", "pxl_row_in_fullres", "pxl_col_in_fullres")
vis_meta <- vis@meta.data
vis_meta$barcodes <- rownames(vis_meta)
vis_CD <- vis_meta %>%
left_join(vis_coord)
vis_sce <- SingleCellExperiment(assays = list(counts = vis_mat,
SCTcounts = vis_sct),
reducedDims = SimpleList(PCA = vis_pca, UMAP = vis_umap),
colData = vis_CD)
# saveRDS(vis_sce, "./intermediate_data/vis_qcd_dimred_sce.rds")
We assign the same number of clusters to BayesSpace identified by
Seurat (n.cluster = 18), since we also get the confirmation of a similar
number of cluster from Chromium (n.cluster = 19, see
Chromium.Rmd and the Visium deconvolution
section in this Markdown). This step takes a long time to run, so it’s
recommended to run on HPC, and save the results.
# Just FYI, there is also a speed-up version of the original Bioconductor R package BayesSpace from GitHub, for the enhanced resolution of BayesSpace (which we won't cover in this tutorial)
# devtools::install_github(senbaikang/BayesSpace)
library(BayesSpace)
vis_sce <- readRDS("./intermediate_data/vis_qcd_dimred_sce.rds")
# DO NOT RUN - TAKING TOO LONG
# We assign the same number of clusters to BayesSpace identified by Seurat
vis_sce <- vis_sce %>%
spatialCluster(q = 18, use.dimred = "PCA", platform = "Visium", nrep = 10000)
# saveRDS(vis_sce, "./intermediate_data/vis_qcd_dimred_sce_bayesspace.rds")
Save Visium BayesSpace clustering result back to Visium Seurat object for plotting
vis <- readRDS("./intermediate_data/vis_qcd_dimred.rds")
vis_sce <- readRDS("./intermediate_data/vis_qcd_dimred_sce_bayesspace.rds")
vis[["spatial.cluster"]] <- vis_sce$spatial.cluster
Visualize UMAP and spatial plot of Visium, colored by clustering with Seurat and BayesSpace.
p3 <- DimPlot(vis, group.by = "spatial.cluster") + ggtitle("Colored by BayesSpace spatial clustering")
p4 <- SpatialDimPlot(vis, group.by = "spatial.cluster", label = TRUE, label.size = 3)
p3 + p4
We can see that BayesSpace recovers the structure of DCIS 2 (cluster 1)
better compared to Seurat clustering, which does not take into account
the spatial coordinate information.
From here on, you can perform differential expression analysis between the clusters.
library(spacexr)
chrom <- readRDS("./intermediate_data/chrom_raw.rds")
# Reference Chromium, unnormalized counts
chrom_mat <- GetAssayData(chrom, slot = "counts")
cell_types <- as.factor(chrom$Annotation); names(cell_types) <- colnames(chrom)
ref <- Reference(chrom_mat, cell_types)
# Visium to be deconvolved, unnormalized counts
coords <- GetTissueCoordinates(vis)
counts <- GetAssayData(vis, slot = "counts")
puck <- SpatialRNA(coords, counts)
We specify “full” mode here to indicate there can be many cells in a spot, and save RCTD results.
# DO NOT RUN - TAKING TOO LONG
myRCTD <- create.RCTD(puck, ref, max_cores = 4)
myRCTD <- run.RCTD(myRCTD, doublet_mode = 'full')
# saveRDS(myRCTD, "./intermediate_data/vis_RCTD.rds")
Organize cell-type deconvolution results, the weights should be normalized so the proportion of each cell type in a spot sums up to 1.
myRCTD <- readRDS("./intermediate_data/vis_RCTD.rds")
results <- myRCTD@results
norm_weights <- normalize_weights(results$weights)
cell_type_names <- myRCTD@cell_type_info$info[[2]]
spatialRNA <- myRCTD@spatialRNA
barcodes <- colnames(myRCTD@spatialRNA@counts)
Plot cell-type deconvolution result. We first check the deconvolution result for one cell-type. Here we set the intensity range based on the probability score range of this cell-type across all spots.
# range(norm_weights[, cell_type_names[1]])
# 1.072845e-05 3.576271e-01
plot_puck_continuous(spatialRNA, barcodes, norm_weights[, cell_type_names[1]], title = cell_type_names[1], ylimit = c(0, 0.4), size = 0.8) + coord_flip() + scale_x_reverse()
We then separate the remaining cell types into two batches for the sake of clarity in visualization. All these plots have the color scale intensity ranging from 0 to 1. (Note the plot is stretched horizontally in the below limited plotting space, but the orientation matches the H&E spatial feature plot of Visium.)
p <- list()
for (i in 2:10){
which_cell_type <- cell_type_names[i]
p[[i-1]] <- plot_puck_continuous(spatialRNA, barcodes, norm_weights[, which_cell_type], title = which_cell_type) + coord_flip() + scale_x_reverse() + theme(legend.position = "none")
}
do.call(grid.arrange, p)
p <- list()
for (i in 11:19){
which_cell_type <- cell_type_names[i]
p[[i-10]] <- plot_puck_continuous(spatialRNA, barcodes, norm_weights[, which_cell_type], title = which_cell_type) + coord_flip() + scale_x_reverse() + theme(legend.position = "none")
}
do.call(grid.arrange, p)
In addition, you can save the plots with the commented code.
## Create a local directory for saved plots
# RCTD_result_path <- "./results/"
# for (i in 1:length(cell_type_names)){
# which_cell_type <- cell_type_names[i]
# plot_puck_continuous(puck, barcodes, norm_weights[, which_cell_type], title = which_cell_type) + coord_flip() +
# scale_x_reverse()
# ggsave(path = RCTD_result_path, filename = paste0(which_cell_type, ".png"))
# }